import requests
from lxml import etree
import csv

list_movie = []
for page in range(0, 10):
    url = f'https://movie.douban.com/top250?start={25 * page}&filter='
    head1 = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
    }
    data1 = requests.get(url, headers=head1)
    html = etree.HTML(data1.text)

    # 提取电影名称
    movie_name = html.xpath('//*[@id="content"]/div/div[1]/ol/li/div/div[2]//span[1][@class="title"]/text()')


    movie_back = html.xpath('//*[@id="content"]/div/div[1]/ol/li/div/div[2]/div[2]/p[1]/text()[2]')


    movie_pl = []
    for j in range(len(movie_name)):

        comment = html.xpath(f'//*[@id="content"]/div/div[1]/ol/li[{j + 1}]/div/div[2]/div[2]/p[2]/span/text()')
        if comment:
            movie_pl.append(comment[0])
        else:
            movie_pl.append('无评论')


    for j in range(len(movie_name)):
        dic = {}
        dic['电影名称'] = movie_name[j]
        dic['电影类型'] = movie_back[j].replace('\xa0', ' ').split('/')[2].strip()
        dic['电影评论'] = movie_pl[j]
        list_movie.append(dic)

# 写入CSV文件
head2 = ('电影名称', '电影类型', '电影评论')
with open('dbmovie2503.csv', 'w', encoding='utf-8-sig', newline='') as f:
    ws = csv.DictWriter(f, fieldnames=head2)
    ws.writeheader()
    ws.writerows(list_movie)